// -*- mode:c++ -*-

// Copyright (c) 2022 PLCT Lab
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

def template VMemMacroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    %(reg_idx_arr_decl)s;
public:
    %(class_name)s(ExtMachInst _machInst);
    %(class_name)s(ExtMachInst _machInst, uint32_t _elen, uint32_t _vlen);
    using %(base_class)s::generateDisassembly;
};

}};

def template VMemTemplateMacroDeclare {{

template<typename ElemType>
class %(class_name)s : public %(base_class)s
{
private:
    %(reg_idx_arr_decl)s;
public:
    %(class_name)s(ExtMachInst _machInst);
    %(class_name)s(ExtMachInst _machInst, uint32_t _elen, uint32_t _vlen);
    using %(base_class)s::generateDisassembly;
};

}};

def template VleConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
    const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
    int32_t remaining_vl = this->vl;
    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);

    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    }
    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
        microop = new %(class_name)sMicro(_machInst, micro_vl, i, elen, vlen);
        microop->setDelayedCommit();
        microop->setFlag(IsLoad);
        this->microops.push_back(microop);
        micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
    }

    if (_opClass == SimdUnitStrideFaultOnlyFirstLoadOp) {
        microop = new VlFFTrimVlMicroOp(_machInst, this->vl, num_microops,
                                        elen, vlen, microops);
        this->microops.push_back(microop);
    }

    this->microops.front()->setFirstMicroop();
    this->microops.back()->setLastMicroop();
}

}};

def template VleMicroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    RegId srcRegIdxArr[3];
    RegId destRegIdxArr[1];
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
        uint32_t _microIdx, uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;

};

}};

def template VleMicroConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                               uint32_t _microIdx, uint32_t _elen,
                               uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
                   _microIdx, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
    _numTypedDestRegs[VecRegClass]++;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    if (!_machInst.vtype8.vta || (!_machInst.vm && !_machInst.vtype8.vma)) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
    }
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
}

}};

def template VleMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Addr EA;
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);

    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }

    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);

    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl;

    const std::vector<bool> byte_enable(mem_size, true);
    Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
                              byte_enable);

    %(fault_code)s;

    const size_t micro_vlmax = vlen / width_EEW(machInst.width);

    if (fault != NoFault)
        return fault;

    size_t ei;

    for (size_t i = 0; i < micro_vlmax; i++) {
        ei = i + micro_vlmax * microIdx;
        %(memacc_code)s;
    }

    %(op_wb)s;
    return fault;
}

}};

def template VleMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{

    Addr EA;

    %(op_src_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;

    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);

    uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl;

    const std::vector<bool> byte_enable(mem_size, true);
    Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
                                  byte_enable);

    %(fault_code)s;

    return fault;
}

}};

def template VleMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
                            trace::InstRecord *traceData) const
{
    %(op_decl)s;
    %(op_rd)s;

    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    // tail/mask policy: both undisturbed if one is, 1s if none
    %(tail_mask_policy_code)s

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    if (xc->readMemAccPredicate()) {
        memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
    }

    const size_t micro_vlmax = vlen / width_EEW(machInst.width);

    size_t ei;
    for (size_t i = 0; i < micro_vlmax; i++) {
        ei = i + micro_vlmax * microIdx;
        %(memacc_code)s;
    }

    %(op_wb)s;
    return NoFault;
}

}};

def template VseConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
    const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
    int32_t remaining_vl = this->vl;
    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);

    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    }
    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
        microop = new %(class_name)sMicro(_machInst, micro_vl, i, elen, vlen);
        microop->setDelayedCommit();
        microop->setFlag(IsStore);
        this->microops.push_back(microop);
        micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
}

}};

def template VseMicroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    RegId srcRegIdxArr[3];
    RegId destRegIdxArr[0];
public:
    %(class_name)s(ExtMachInst _machInst,
        uint32_t _microVl, uint32_t _microIdx, uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VseMicroConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                               uint32_t _microIdx, uint32_t _elen,
                               uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
                    _microIdx, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
    this->flags[IsVector] = true;
    this->flags[IsStore] = true;
}

}};

def template VseMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Addr EA;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;

    const size_t micro_vlmax = vlen / width_EEW(machInst.width);
    const size_t eewb = width_EEW(machInst.width) / 8;
    const size_t mem_size = eewb * microVl;
    std::vector<bool> byte_enable(mem_size, false);
    size_t ei;
    for (size_t i = 0; i < microVl; i++) {
        ei = i + micro_vlmax * microIdx;
        if (machInst.vm || elem_mask(v0, ei)) {
            %(memacc_code)s;
            auto it = byte_enable.begin() + i * eewb;
            std::fill(it, it + eewb, true);
        }
    }

    Fault fault;
    fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
                         nullptr, byte_enable);
    return fault;
}

}};

def template VseMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    Addr EA;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;

    const size_t micro_vlmax = vlen / width_EEW(machInst.width);
    const size_t eewb = width_EEW(machInst.width) / 8;
    const size_t mem_size = eewb * microVl;
    std::vector<bool> byte_enable(mem_size, false);
    size_t ei;
    for (size_t i = 0; i < microVl; i++) {
        ei = i + micro_vlmax * microIdx;
        if (machInst.vm || elem_mask(v0, ei)) {
            %(memacc_code)s;
            auto it = byte_enable.begin() + i * eewb;
            std::fill(it, it + eewb, true);
        }
    }

    Fault fault;
    fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
                         nullptr, byte_enable);
    return fault;
}

}};

def template VseMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    return NoFault;
}

}};

def template VlmConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    int32_t micro_vl = (this->vl + 7) / 8;
    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
    } else {
        microop = new Vle8_vMicro(_machInst, micro_vl, 0, elen, vlen);
        microop->setDelayedCommit();
        microop->setFlag(IsLoad);
    }
    this->microops.push_back(microop);

    this->microops.front()->setFirstMicroop();
    this->microops.back()->setLastMicroop();
}

}};

def template VsmConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    int32_t micro_vl = (this->vl + 7) / 8;

    StaticInstPtr microop;
    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
    } else {
        microop = new Vse8_vMicro(_machInst, micro_vl, 0, elen, vlen);
        microop->setDelayedCommit();
        microop->setFlag(IsStore);
    }
    this->microops.push_back(microop);

    this->microops.front()->setFirstMicroop();
    this->microops.back()->setLastMicroop();
}

}};

def template VsWholeConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    size_t NFIELDS = machInst.nf + 1;
    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);

    StaticInstPtr microop;
    for (int i = 0; i < NFIELDS; ++i) {
        microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, elen,
                                          vlen);
        microop->setDelayedCommit();
        microop->setFlag(IsStore);
        this->microops.push_back(microop);
    }

    this->microops.front()->setFirstMicroop();
    this->microops.back()->setLastMicroop();
}

}};

def template VsWholeMicroDeclare {{

class %(class_name)s: public %(base_class)s
{
private:
    RegId destRegIdxArr[0];
    RegId srcRegIdxArr[2];
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                   uint32_t _microIdx, uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                        trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VsWholeMicroConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                               uint32_t _microIdx, uint32_t _elen,
                               uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
                   _microIdx, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
    this->flags[IsVector] = true;
    this->flags[IsStore] = true;
}

}};

def template VsWholeMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;


    for (size_t i = 0; i < vlenb; i++) {
        %(memacc_code)s;
    }

    Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem),
                                   vlenb, EA, memAccessFlags, nullptr);
    return fault;
}

}};

def template VsWholeMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
        trace::InstRecord* traceData) const
{
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;


    for (size_t i = 0; i < vlenb; i++) {
        %(memacc_code)s;
    }

    Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem),
                                   EA, vlenb, memAccessFlags, nullptr);
    return fault;
}

}};

def template VsWholeMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    return NoFault;
}

}};

def template VlWholeConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    size_t NFIELDS = machInst.nf + 1;

    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);

    StaticInstPtr microop;
    for (int i = 0; i < NFIELDS; ++i) {
        microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, elen,
                                          vlen);
        microop->setDelayedCommit();
        microop->setFlag(IsLoad);
        this->microops.push_back(microop);
    }

    this->microops.front()->setFirstMicroop();
    this->microops.back()->setLastMicroop();
}

}};

def template VlWholeMicroDeclare {{

class %(class_name)s: public %(base_class)s
{
private:
    RegId destRegIdxArr[1];
    RegId srcRegIdxArr[1];
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                   uint32_t _microIdx, uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                        trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VlWholeMicroConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                               uint32_t _microIdx, uint32_t _elen,
                               uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, _microVl,
                   _microIdx, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
    _numTypedDestRegs[VecRegClass]++;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    this->flags[IsVector] = true;
    this->flags[IsLoad] = true;
}

}};

def template VlWholeMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }

    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;

    Fault fault = readMemAtomicLE(xc, traceData, EA,
                            *(vreg_t::Container*)(&Mem), vlenb,
                            memAccessFlags);
    if (fault != NoFault)
        return fault;

    size_t elem_per_reg = vlen / width_EEW(machInst.width);
    for (size_t i = 0; i < elem_per_reg; i++) {
        %(memacc_code)s;
    }

    %(op_wb)s;
    return NoFault;
}

}};

def template VlWholeMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    %(op_src_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;

    const std::vector<bool> byte_enable(vlenb, true);
    Fault fault = initiateMemRead(xc, EA, vlenb, memAccessFlags, byte_enable);
    return fault;
}

}};

def template VlWholeMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
        trace::InstRecord* traceData) const
{
    %(op_decl)s;
    %(op_rd)s;

    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());

    size_t elem_per_reg = vlen / width_EEW(machInst.width);
    for (size_t i = 0; i < elem_per_reg; ++i) {
        %(memacc_code)s;
    }

    %(op_wb)s;
    return NoFault;
}

}};

def template VlStrideConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width);
    int32_t remaining_vl = this->vl;
    // Num of elems in one vreg
    int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg);
    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    }

    const uint8_t num_pinvd_microops = ceil((float) this->vl /
                                                    num_elems_per_vreg);
    for (uint32_t i = 0; i < num_pinvd_microops; i++) {
        uint32_t vdNumElems = (vl >= num_elems_per_vreg*(i+1))
                              ? num_elems_per_vreg : vl-num_elems_per_vreg*i;
        microop = new VPinVdMicroInst(machInst, i, vdNumElems, elen, vlen);
        microop->setFlag(IsDelayedCommit);
        this->microops.push_back(microop);
    }

    for (int i = 0; micro_vl > 0; ++i) {
        for (int j = 0; j < micro_vl; ++j) {
            microop = new %(class_name)sMicro(machInst, i, j, micro_vl, elen,
                                              vlen);
            microop->setFlag(IsDelayedCommit);
            microop->setFlag(IsLoad);
            this->microops.push_back(microop);
        }
        remaining_vl -= num_elems_per_vreg;
        micro_vl = std::min(remaining_vl, num_elems_per_vreg);
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
    this->flags[IsVector] = true;
}

}};

def template VlStrideMicroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    // rs1, rs2, vtmp0, vm
    RegId srcRegIdxArr[4];
    RegId destRegIdxArr[1];
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _regIdx, uint32_t _microIdx,
                   uint32_t _microVl, uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VlStrideMicroConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _regIdx,
                               uint32_t _microIdx, uint32_t _microVl,
                               uint32_t _elen, uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _regIdx, _microIdx,
                   _microVl, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]);
    _numTypedDestRegs[VecRegClass]++;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
    // vtmp0 as dummy src reg to create dependency with pin vd micro
    setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0]);
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
    this->flags[IsLoad] = true;
}

}};

def template VlStrideMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }

    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);

    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    constexpr uint8_t elem_size = sizeof(Vd[0]);
    %(ea_code)s; // ea_code depends on elem_size

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = elem_size;
    const std::vector<bool> byte_enable(mem_size, true);

    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size,
                                memAccessFlags, byte_enable);
        if (fault != NoFault)
            return fault;
        %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
    }

    %(op_wb)s;
    return fault;
}

}};

def template VlStrideMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    %(op_src_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    constexpr uint8_t elem_size = sizeof(Vd[0]);
    %(ea_code)s; // ea_code depends on elem_size

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = elem_size;
    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
    bool need_load = machInst.vm || elem_mask(v0, ei);
    const std::vector<bool> byte_enable(mem_size, need_load);
    fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable);
    return fault;
}

}};

def template VlStrideMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
                            trace::InstRecord *traceData) const
{
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;

    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
        %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
    }

    %(op_wb)s;
    return NoFault;
}

}};

def template VsStrideConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width);
    int32_t remaining_vl = this->vl;
    // Num of elems in one vreg
    int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg);
    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    }
    for (int i = 0; micro_vl > 0; ++i) {
        for (int j = 0; j < micro_vl; ++j) {
            microop = new %(class_name)sMicro(machInst, i, j, micro_vl, elen,
                                              vlen);
            microop->setFlag(IsDelayedCommit);
            microop->setFlag(IsStore);
            this->microops.push_back(microop);
        }
        remaining_vl -= num_elems_per_vreg;
        micro_vl = std::min(remaining_vl, num_elems_per_vreg);
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
    this->flags[IsVector] = true;
}

}};

def template VsStrideMicroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    // rs1, rs2, vs3, vm
    RegId srcRegIdxArr[4];
    RegId destRegIdxArr[0];
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _regIdx, uint32_t _microIdx,
                   uint32_t _microVl, uint32_t _elen, uint32_t _vlen);
    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VsStrideMicroConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _regIdx,
                               uint32_t _microIdx, uint32_t _microVl,
                               uint32_t _elen, uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, _regIdx,
                   _microIdx, _microVl, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]);
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
    this->flags[IsStore] = true;
}

}};

def template VsStrideMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    constexpr uint8_t elem_size = sizeof(Vs3[0]);
    %(ea_code)s;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = elem_size;
    const std::vector<bool> byte_enable(mem_size, true);

    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        %(memacc_code)s;
        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
                             memAccessFlags, nullptr, byte_enable);
    }
    return fault;
}

}};

def template VsStrideMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    constexpr uint8_t elem_size = sizeof(Vs3[0]);
    %(ea_code)s;

    uint32_t mem_size = elem_size;

    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
    bool need_store = machInst.vm || elem_mask(v0, ei);
    if (need_store) {
        const std::vector<bool> byte_enable(mem_size, need_store);
        %(memacc_code)s;
        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
                            memAccessFlags, nullptr, byte_enable);
    }
    return fault;
}

}};

def template VsStrideMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    return NoFault;
}

}};

def template VlIndexConstructor {{

template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                                         uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const uint32_t vd_eewb = sizeof(ElemType);
    const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8;
    const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb;
    const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb;
    uint32_t vlenb = vlen >> 3;
    const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb);
    int32_t remaining_vl = this->vl;
    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    }

    const uint32_t vd_vlmax = vlenb / vd_eewb;
    const uint8_t num_pinvdcpyvs_microops = ceil((float) this->vl/vd_vlmax);
    for (uint32_t i = 0; i < num_pinvdcpyvs_microops; i++) {
        uint32_t vdNumElems = (vl >= vd_vlmax*(i+1)) ? vd_vlmax:vl-vd_vlmax*i;

        microop = new VCpyVsMicroInst(machInst, i, machInst.vs2, elen, vlen);
        microop->setFlag(IsDelayedCommit);
        this->microops.push_back(microop);

        microop = new VPinVdMicroInst(machInst, i, vdNumElems, elen, vlen);
        microop->setFlag(IsDelayedCommit);
        this->microops.push_back(microop);
    }

    for (uint32_t i = 0; micro_vl > 0; i++) {
        for (uint32_t j = 0; j < micro_vl; ++j) {
            uint32_t vdRegIdx = i / vd_split_num;
            uint32_t vs2RegIdx = i / vs2_split_num;
            uint32_t vdElemIdx = j + micro_vlmax * (i % vd_split_num);
            uint32_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num);
            microop = new %(class_name)sMicro<ElemType>(machInst, vdRegIdx,
                                                        vdElemIdx, vs2RegIdx,
                                                        vs2ElemIdx, elen,
                                                        vlen);
            microop->setFlag(IsDelayedCommit);
            microop->setFlag(IsLoad);
            this->microops.push_back(microop);
        }
        remaining_vl -= micro_vlmax;
        micro_vl = std::min(remaining_vl, micro_vlmax);
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
    this->flags[IsVector] = true;
}

%(declare_vmem_template)s;

}};

def template VlIndexMicroDeclare {{

template<typename ElemType>
class %(class_name)s : public %(base_class)s
{
private:
    // rs1, vs2, vm
    RegId srcRegIdxArr[3];
    RegId destRegIdxArr[1];
public:
    %(class_name)s(ExtMachInst _machInst,
        uint32_t _vdRegIdx, uint32_t _vdElemIdx,
        uint32_t _vs2RegIdx, uint32_t _vs2ElemIdx,
        uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VlIndexMicroConstructor {{

template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(
    ExtMachInst _machInst,uint32_t _vdRegIdx, uint32_t _vdElemIdx,
    uint32_t _vs2RegIdx, uint32_t _vs2ElemIdx, uint32_t _elen, uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vdRegIdx,
                   _vdElemIdx, _vs2RegIdx, _vs2ElemIdx, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
    _numTypedDestRegs[VecRegClass]++;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _vs2RegIdx]);
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
    this->flags[IsLoad] = true;
}

%(declare_vmem_template)s;

}};

def template VlIndexMicroExecute {{

template<typename ElemType>
Fault
%(class_name)s<ElemType>::execute(ExecContext *xc,
    trace::InstRecord *traceData)const
{
    using vu = std::make_unsigned_t<ElemType>;
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }

    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);

    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;
    constexpr uint8_t elem_size = sizeof(Vd[0]);
    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = elem_size;
    const std::vector<bool> byte_enable(mem_size, true);
    size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size,
                                memAccessFlags, byte_enable);
        if (fault != NoFault)
            return fault;
        %(memacc_code)s; /* Vd[this->vdElemIdx] = Mem[0]; */
    }

    %(op_wb)s;
    return fault;
}

}};

def template VlIndexMicroInitiateAcc {{

template<typename ElemType>
Fault
%(class_name)s<ElemType>::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    using vu = std::make_unsigned_t<ElemType>;
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    %(op_src_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    constexpr uint8_t elem_size = sizeof(Vd[0]);
    %(ea_code)s; // ea_code depends on elem_size

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = elem_size;

    size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
    bool need_load = machInst.vm || elem_mask(v0, ei);
    const std::vector<bool> byte_enable(mem_size, need_load);
    fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable);
    return fault;
}

}};

def template VlIndexMicroCompleteAcc {{

template<typename ElemType>
Fault
%(class_name)s<ElemType>::completeAcc(PacketPtr pkt, ExecContext *xc,
                            trace::InstRecord *traceData) const
{
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    using vu = std::make_unsigned_t<ElemType>;
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;

    constexpr uint8_t elem_size = sizeof(Vd[0]);

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
        %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
    }

    %(op_wb)s;
    return NoFault;
}

%(declare_vmem_template)s;

}};

def template VsIndexConstructor {{

template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                                         uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const uint32_t vs3_eewb = sizeof(ElemType);
    const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8;
    const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb;
    const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb;
    uint32_t vlenb = vlen >> 3;
    const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb);
    int32_t remaining_vl = this->vl;
    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
    StaticInstPtr microop;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    }
    for (uint32_t i = 0; micro_vl > 0; i++) {
        for (uint32_t j = 0; j < micro_vl; ++j) {
            uint32_t vs3RegIdx = i / vs3_split_num;
            uint32_t vs2RegIdx = i / vs2_split_num;
            uint32_t vs3ElemIdx = j + micro_vlmax * (i % vs3_split_num);
            uint32_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num);
            microop = new %(class_name)sMicro<ElemType>(machInst, vs3RegIdx,
                                                        vs3ElemIdx, vs2RegIdx,
                                                        vs2ElemIdx, elen,
                                                        vlen);
            microop->setFlag(IsDelayedCommit);
            microop->setFlag(IsStore);
            this->microops.push_back(microop);
        }
        remaining_vl -= micro_vlmax;
        micro_vl = std::min(remaining_vl, micro_vlmax);
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
    this->flags[IsVector] = true;
}

%(declare_vmem_template)s;

}};

def template VsIndexMicroDeclare {{

template<typename ElemType>
class %(class_name)s : public %(base_class)s
{
private:
    // rs1, vs2, vs3, vm
    RegId srcRegIdxArr[4];
    RegId destRegIdxArr[0];
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _vs3RegIdx,
                   uint32_t _vs3ElemIdx, uint32_t _vs2RegIdx,
                   uint32_t _vs2ElemIdx, uint32_t _elen, uint32_t _vlen);

    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VsIndexMicroConstructor {{

template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
                                         uint32_t _vs3RegIdx,
                                         uint32_t _vs3ElemIdx,
                                         uint32_t _vs2RegIdx,
                                         uint32_t _vs2ElemIdx,
                                         uint32_t _elen, uint32_t _vlen)
  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vs3RegIdx,
                   _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    _numSrcRegs = 0;
    _numDestRegs = 0;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
    // We treat agnostic as undistrubed
    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]);
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
    this->flags[IsStore] = true;
}

%(declare_vmem_template)s;

}};

def template VsIndexMicroExecute {{

template<typename ElemType>
Fault
%(class_name)s<ElemType>::execute(ExecContext *xc,
    trace::InstRecord *traceData)const
{
    using vu = std::make_unsigned_t<ElemType>;
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;
    constexpr uint8_t elem_size = sizeof(Vs3[0]);
    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    uint32_t mem_size = elem_size;
    const std::vector<bool> byte_enable(mem_size, true);

    size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */
        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
                             memAccessFlags, nullptr, byte_enable);
    }
    return fault;
}

}};

def template VsIndexMicroInitiateAcc {{

template<typename ElemType>
Fault
%(class_name)s<ElemType>::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    using vu = std::make_unsigned_t<ElemType>;
    Fault fault = NoFault;
    Addr EA;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    %(op_decl)s;
    %(op_rd)s;
    %(set_vlenb)s;
    %(ea_code)s;
    constexpr uint8_t elem_size = sizeof(Vs3[0]);
    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if (!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    constexpr uint8_t mem_size = elem_size;
    const std::vector<bool> byte_enable(mem_size, true);

    size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx;
    if (machInst.vm || elem_mask(v0, ei)) {
        %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */
        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
                             memAccessFlags, nullptr, byte_enable);
    }
    return fault;
}

}};

def template VsIndexMicroCompleteAcc {{

template<typename ElemType>
Fault
%(class_name)s<ElemType>::completeAcc(PacketPtr pkt, ExecContext* xc,
                            trace::InstRecord* traceData) const
{
    return NoFault;
}

%(declare_vmem_template)s;

}};

def template VlSegConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
    const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
    int32_t remaining_vl = this->vl;
    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
    size_t NFIELDS = machInst.nf + 1;
    StaticInstPtr microop;
    uint32_t size_per_elem = width_EEW(_machInst.width) / 8;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    } else {
        for (int f = 0; f < NFIELDS; ++f) {
            remaining_vl = this->vl;
            micro_vl = std::min(remaining_vl, micro_vlmax);
            for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
                microop = new %(class_name)sMicro(_machInst, micro_vl, i,
                                                  num_microops, f, NFIELDS,
                                                  elen, vlen);
                microop->setDelayedCommit();
                microop->setFlag(IsLoad);
                this->microops.push_back(microop);
                micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
            }
        }
        for (int f = 0; f < NFIELDS; ++f) {
            remaining_vl = this->vl;
            micro_vl = std::min(remaining_vl, micro_vlmax);
            for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
                microop = new VlSegDeIntrlvMicroInst(_machInst, micro_vl,
                    _machInst.vd + i + (f * num_microops), NFIELDS, i,
                    num_microops, f, elen, vlen, size_per_elem);
                this->microops.push_back(microop);
                micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
            }
        }
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
    this->flags[IsVector] = true;
}
}};

def template VlSegMicroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    // rs1, vd, vm
    RegId srcRegIdxArr[3];
    RegId destRegIdxArr[1];
    uint32_t field;
    uint32_t numFields;
    uint32_t numMicroops;
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                   uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field,
                   uint32_t _numFields, uint32_t _elen, uint32_t _vlen);
    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VlSegMicroConstructor {{

    %(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                                   uint32_t _microIdx, uint32_t _numMicroops,
                                   uint32_t _field, uint32_t _numFields,
                                   uint32_t _elen, uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
                     _microIdx , _numMicroops, _field, _numFields, _elen,
                     _vlen)
{
    %(set_reg_idx_arr)s;

    _numSrcRegs = 0;
    _numDestRegs = 0;
    field = _field;
    numFields = _numFields;
    numMicroops = _numMicroops;
    setDestRegIdx(_numDestRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
        (field * numMicroops)]);
    _numTypedDestRegs[VecRegClass]++;
    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    if (!_machInst.vtype8.vta || (!_machInst.vm && !_machInst.vtype8.vma)) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx
                                                + (field * numMicroops)]);
    }
    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
}

}};

def template VlSegMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Addr EA;
    uint32_t mem_size = width_EEW(machInst.width) / 8 * microVl;

    %(op_decl)s;
    %(op_rd)s;
    %(ea_code)s;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);

    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }

    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);

    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
    if (((1 << vlmul) * this->numFields) > 8)
        return std::make_shared<IllegalInstFault>("LMUL value is illegal for vlseg inst", machInst);

    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    const std::vector<bool> byte_enable(mem_size, true);
    Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
                              byte_enable);

    if (fault != NoFault)
        return fault;

    const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
    const size_t micro_elems = vlen / width_EEW(machInst.width);

    size_t ei;

    for (size_t i = 0; i < micro_elems; i++) {
        ei = i + micro_vlmax * microIdx;
        %(memacc_code)s;
    }

    %(op_wb)s;
    return fault;
}

}};

def template VlSegMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{

    Addr EA;
    uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl;

    %(op_decl)s;
    %(op_rd)s;
    %(ea_code)s;

    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);

    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
    if (((1 << vlmul) * this->numFields) > 8)
        return std::make_shared<IllegalInstFault>("LMUL value is illegal for vlseg inst", machInst);

    const std::vector<bool> byte_enable(mem_size, true);
    Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
                                  byte_enable);
    return fault;
}

}};

def template VlSegMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
                            trace::InstRecord *traceData) const
{
    %(op_decl)s;
    %(op_rd)s;

    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    status.vs = VPUStatus::DIRTY;
    xc->setMiscReg(MISCREG_STATUS, status);

    // tail/mask policy: both undisturbed if one is, 1s if none
    %(tail_mask_policy_code)s

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());

    const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
    const size_t micro_elems = vlen / width_EEW(machInst.width);

    size_t ei;
    for (size_t i = 0; i < micro_elems; i++) {
        ei = i + micro_vlmax * microIdx;
        %(memacc_code)s;
    }

    %(op_wb)s;
    return NoFault;
}

}};

def template VsSegConstructor {{

%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen,
                               uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _elen, _vlen)
{
    %(set_reg_idx_arr)s;
    %(constructor)s;

    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
    const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
    int32_t remaining_vl = this->vl;
    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
    size_t NFIELDS = machInst.nf + 1;
    StaticInstPtr microop;
    uint32_t size_per_elem = width_EEW(_machInst.width) / 8;

    if (micro_vl == 0) {
        microop = new VectorNopMicroInst(_machInst);
        this->microops.push_back(microop);
    } else {
        for (int f = 0; f < NFIELDS; ++f) {
            remaining_vl = this->vl;
            micro_vl = std::min(remaining_vl, micro_vlmax);
            for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
                microop = new VsSegIntrlvMicroInst(_machInst, micro_vl,
                    _machInst.vs3, NFIELDS, i, num_microops, f, elen, vlen,
                    size_per_elem);
                this->microops.push_back(microop);
                micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
            }
        }
        for (int f = 0; f < NFIELDS; ++f) {
            remaining_vl = this->vl;
            micro_vl = std::min(remaining_vl, micro_vlmax);
            for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
                microop = new %(class_name)sMicro(_machInst, micro_vl, i,
                                                  num_microops, f, NFIELDS,
                                                  elen, vlen);
                microop->setDelayedCommit();
                microop->setFlag(IsStore);
                this->microops.push_back(microop);
                micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
            }
        }
    }

    this->microops.front()->setFlag(IsFirstMicroop);
    this->microops.back()->setFlag(IsLastMicroop);
    this->flags[IsVector] = true;
}
}};

def template VsSegMicroDeclare {{

class %(class_name)s : public %(base_class)s
{
private:
    // rs1, rs2, vd, vm
    RegId srcRegIdxArr[4];
    RegId destRegIdxArr[1];
    uint32_t field;
    uint32_t numFields;
    uint32_t numMicroops;
public:
    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                   uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field,
                   uint32_t _numFields, uint32_t _elen, uint32_t _vlen);
    Fault execute(ExecContext *, trace::InstRecord *) const override;
    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
    Fault completeAcc(PacketPtr, ExecContext *,
                      trace::InstRecord *) const override;
    using %(base_class)s::generateDisassembly;
};

}};

def template VsSegMicroConstructor {{

    %(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
                                   uint32_t _microIdx, uint32_t _numMicroops,
                                   uint32_t _field, uint32_t _numFields,
                                   uint32_t _elen, uint32_t _vlen)
    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
                     _microIdx, _numMicroops, _field, _numFields, _elen, _vlen)
{
    %(set_reg_idx_arr)s;

    _numSrcRegs = 0;
    _numDestRegs = 0;
    field = _field;
    numFields = _numFields;
    numMicroops = _numMicroops;

    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
    setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
            (field * numMicroops)]);

    if (!_machInst.vm) {
        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
    }
    this->flags[IsVector] = true;
    this->flags[IsStore] = true;
}

}};

def template VsSegMicroExecute {{

Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
    Addr EA;

    const size_t eewb = width_EEW(machInst.width) / 8;
    const size_t mem_size = eewb * microVl;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    %(op_decl)s;
    %(op_rd)s;
    %(ea_code)s;

    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
    if (((1 << vlmul) * this->numFields) > 8)
        return std::make_shared<IllegalInstFault>("LMUL value is illegal for vsseg inst", machInst);

    const size_t micro_vlmax = vlen / width_EEW(machInst.width);

    std::vector<bool> byte_enable(mem_size, false);
    size_t ei;
    for (size_t i = 0; i < microVl; i++) {
        ei = i + micro_vlmax * microIdx;
        if (machInst.vm || elem_mask_vseg(v0, ei + (field * microVl),
            this->numFields)) {
            %(memacc_code)s;
            auto it = byte_enable.begin() + i * eewb;
            std::fill(it, it + eewb, true);
        }
    }

    Fault fault;
    fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
                         nullptr, byte_enable);
    return fault;
}

}};

def template VsSegMicroInitiateAcc {{

Fault
%(class_name)s::initiateAcc(ExecContext* xc,
                            trace::InstRecord* traceData) const
{

    Addr EA;

    const size_t eewb = width_EEW(machInst.width) / 8;
    const size_t mem_size = eewb * microVl;

    RiscvISA::vreg_t tmp_v0;
    uint8_t *v0;
    MISA misa = xc->readMiscReg(MISCREG_ISA);
    STATUS status = xc->readMiscReg(MISCREG_STATUS);
    if (!misa.rvv || status.vs == VPUStatus::OFF) {
        return std::make_shared<IllegalInstFault>(
            "RVV is disabled or VPU is off", machInst);
    }
    if (machInst.vill)
        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
    if(!machInst.vm) {
        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
        v0 = tmp_v0.as<uint8_t>();
    }

    %(op_decl)s;
    %(op_rd)s;
    %(ea_code)s;

    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
    if (((1 << vlmul) * this->numFields) > 8)
        return std::make_shared<IllegalInstFault>("LMUL value is illegal for vsseg inst", machInst);

    const size_t micro_vlmax = vlen / width_EEW(machInst.width);

    std::vector<bool> byte_enable(mem_size, false);
    size_t ei;
    for (size_t i = 0; i < microVl; i++) {
        ei = i + micro_vlmax * microIdx;
        if (machInst.vm || elem_mask_vseg(v0, ei + (field * microVl),
            this->numFields)) {
            %(memacc_code)s;
            auto it = byte_enable.begin() + i * eewb;
            std::fill(it, it + eewb, true);
        }
    }

    Fault fault;
    fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
                         nullptr, byte_enable);
    return fault;
}

}};

def template VsSegMicroCompleteAcc {{

Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
                            trace::InstRecord *traceData) const
{
    return NoFault;
}

}};

def template VMemBaseDecodeBlock {{
    return new %(class_name)s(machInst, elen, vlen);
}};

def template VMemTemplateDecodeBlock {{

switch(machInst.vtype8.vsew) {
    case 0b000: {
        return new %(class_name)s<uint8_t>(machInst, elen, vlen);
    }
    case 0b001: {
        return new %(class_name)s<uint16_t>(machInst, elen, vlen);
    }
    case 0b010: {
        return new %(class_name)s<uint32_t>(machInst, elen, vlen);
    }
    case 0b011: {
        return new %(class_name)s<uint64_t>(machInst, elen, vlen);
    }
    default: GEM5_UNREACHABLE;
}

}};

def template VMemSplitTemplateDecodeBlock {{

switch(machInst.vtype8.vsew) {
    case 0b000: {
        return new %(class_name)s<uint8_t>(machInst, elen, vlen);
    }
    case 0b001: {
        return new %(class_name)s<uint16_t>(machInst, elen, vlen);
    }
    case 0b010: {
        return new %(class_name)s<uint32_t>(machInst, elen, vlen);
    }
    case 0b011: {
        return new %(class_name)s<uint64_t>(machInst, elen, vlen);
    }
    default: GEM5_UNREACHABLE;
}

}};
